library(tidycensus)
library(tidyverse)
## -- Attaching packages -------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.2.1 √ purrr 0.3.3
## √ tibble 2.1.3 √ dplyr 0.8.4
## √ tidyr 1.0.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## -- Conflicts ----------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(readxl)
library(ggrepel)
library(mapview)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
options(tigris_use_cache = TRUE)
v10d_sf1 <- load_variables(year = 2010, dataset = "sf1", cache = TRUE)
test <- get_decennial(state = "WA", county = "King", geography = "block", year = 2010, variables = "P012001", geometry = TRUE)
## Getting data from the 2010 decennial Census
Set up the table ids
guyvars <- c(paste0("P01200", 3:9), paste0("P0120", 10:25))
#galvars
guy_sumvars <- c("P012001", "P012002")
#gal_sumvars
Get the data
king_guys <- get_decennial(state = "WA", county = "King", geography = "tract", year = 2010, variables = guyvars, summary_var = "P012001", geometry = TRUE)
## Getting data from the 2010 decennial Census
Clean that data
king_guys2 <- king_guys %>%
left_join(select(v10d_sf1, -concept), by = c("variable" = "name")) %>%
separate(label, into = c("count_type", "sex", "age"), sep = "!!") %>%
mutate(age = str_replace(age, "Under", "0 to"),
age = str_replace(age, "years and over", "to 100 years"),
min_age = as.integer(str_extract(age, "\\d+")),
max_age = as.integer(str_extract(age, "\\d+ years$") %>%
str_extract("\\d+"))) %>%
clean_names()
Quick peak
king_guys2 %>%
mutate(census_tract = str_extract(name, "\\d+\\.?\\d?\\d?")) %>%
ggplot(aes(x = census_tract, y = value)) +
geom_col() +
facet_wrap(~age, nrow = 1) +
coord_flip()
ggsave("king_county_dudes_by_age.pdf", width = 24, height = 36)
median_age_both_sex <- "P013001"
king_mage <- get_decennial(state = "WA", county = "King", geography = "tract", year = 2010, variables = c(median_age = median_age_both_sex), geometry = TRUE)
## Getting data from the 2010 decennial Census
king_mage %<>%
clean_names() %>%
separate(name, into = c("tract", "county", "state"), sep = ",") %>%
mutate(tract = str_extract(tract, "\\d+\\.?\\d?\\d?$"),
county = str_remove(county, " County"))
Figuring out what census tracts belong to Seattle
seattle_data <- read_excel("SeattleCensusBlocksandNeighborhoodCorrelationFile.xlsx")
seattle_tracts <- seattle_data %>%
clean_names() %>%
select(tract_10, urban_village_name, cra_name, neighborhoods_included, neighborhood_district_name) %>%
mutate(tract = str_replace(as.character(tract_10), "(\\d+)(\\d\\d)$", "\\1\\.\\2") %>%
str_replace("(\\d+)\\.00", "\\1")) %>%
select(tract, everything(), -tract_10) %>%
filter(tract != "9901") # this is water around vashon island
Combine tables
seattle_mage <- king_mage %>%
semi_join(seattle_tracts, by = "tract")
seattle_mage %>%
ggplot(aes(y = value, x = fct_reorder(tract, value))) +
geom_point() +
coord_flip()
seattle_mage %>%
ggplot(aes(x = value)) +
geom_histogram(binwidth = 2)
sea_tracts_flat <- seattle_tracts %>%
group_by(tract, cra_name) %>%
tally() %>%
select(-n) %>%
ungroup()
sea_neighs_mage <- king_mage %>%
left_join(sea_tracts_flat, by = "tract") %>%
filter(!is.na(cra_name))
sea_neighs_mage %>%
ggplot(aes(y = value, x = fct_reorder(paste0(tract, ": ", cra_name), value), col = if_else(tract %in% c(86, 87, 90), TRUE, FALSE))) +
geom_point() +
coord_flip() +
theme(legend.position = "none")
sea_neighs_mage %>%
ggplot(aes(y = value, x = fct_reorder(tract, value), label = cra_name)) +
geom_point() +
geom_text(aes(hjust = if_else(value > median(value), 1.1, -0.1))) +
coord_flip()
sea_neighs_mage %>%
ggplot(aes(y = value, x = fct_reorder(tract, value), label = cra_name)) +
geom_point() +
geom_text_repel() +
coord_flip()
sea_neighs_mage %>%
ggplot(aes(fill = value)) +
geom_sf() +
scale_fill_viridis_c()
mapview(sea_neighs_mage, zcol = "value", legend = TRUE)
median_age_both_sex <- "P013001"
king_mage2 <- get_decennial(state = "WA", county = "King", geography = "tract", year = 2010, variables = c(median_age = median_age_both_sex), geometry = TRUE, keep_geo_vars = TRUE)
## Getting data from the 2010 decennial Census
Trying with the water to make it look nice.
# get the water
king_water <- area_water("WA", "King", class = "sf")
king2_erased <- st_difference(king_mage2, st_union(st_combine(king_water), by_feature = TRUE))
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
king_mage2_clean <- king2_erased %>%
clean_names() %>%
select(-geo_id, -state, -county, -tract, name = name_y, -name_x, -lsad, -countyfp, -statefp, -geoid) %>%
separate(name, into = c("tract", "county", "state"), sep = ",") %>%
mutate(tract = str_extract(tract, "\\d+\\.?\\d?\\d?$"),
county = str_remove(county, " County"))
sea_neighs_mage2 <- king_mage2_clean %>%
left_join(sea_tracts_flat, by = "tract") %>%
filter(!is.na(cra_name))
sea_neighs_mage2 %>%
ggplot(aes(fill = value)) +
geom_sf() +
scale_fill_viridis_c() +
theme_bw() +
labs(fill = "age")
king_mage2_clean %>%
ggplot(aes(fill = value)) +
geom_sf() +
scale_fill_viridis_c() +
theme_bw() +
labs(fill = "age")
mapview(sea_neighs_mage2, zcol = "value", legend = TRUE)
mapview(king_mage2_clean, zcol = "value", legend = TRUE)